package com.czk.java;

import org.apache.spark.SparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.util.Arrays;

/**
 * @Author:ChenZhangKun
 * @Date: 2021/12/14 9:55
 */
public class JavaSparkSql {
    public static void main(String[] args) {
        // 创建sparkSession
        SparkSession sparkSession = SparkSession.builder().appName("java").master("local[*]").getOrCreate();
        sparkSession.sparkContext().setLogLevel("WARN");
        Dataset<String> ds = sparkSession.read().textFile("F:\\IdeaProjects\\bigData\\spark\\datas\\test.txt");

        Dataset<String> wordsDs = ds.flatMap((String line) -> Arrays.asList(line.split(" ")).iterator(), Encoders.STRING());

        wordsDs.createOrReplaceGlobalTempView("t_word");

        String sql="select value,count(*) as counts" +
                "from t_words" +
                "group by value" +
                "order by counts desc";
        sparkSession.sql(sql).show();

        Dataset<Row> temp = wordsDs.groupBy("value").count();

        temp.orderBy(temp.col("count").desc()).show();
    }
}
